%find the first and last dates of observation 
tic;SPA_Social_graph=dataset('File','SPA_Social_graph.txt','Delimiter','\t','format','%s%s%d%s%s%f%f%f%f%f%f%d%d%d'); toc; 
first_day_of_observations=datestr(min(SPA_Social_graph.datenumber));
last_day_of_observation=datestr(max(SPA_Social_graph.datenumber));


%clean the social graph and identify the user numbers of ego and alter.
USERREL=dataset('File','USERREL.txt','Delimiter',',','format','%s%s%s%s%s%s');


App_Users_Demographic=dataset('File','App_Users_in_Graph_demographics.txt'); %the users we consider in our analysis, i.e. they appear in the social network, we have running activity data for them and we have identified their geographical location.
s=num2str(App_Plus_Users_Demographic.UPM_USER_ID);
s=cellstr(s);s=strtrim(s);
App_users_IDS=[];
App_users_IDS.App_PLUS_USER_ID=App_Users_Demographic.USER_ID;
App_users_IDS.UPM_USER_ID=s;
App_users_IDS=struct2dataset(App_users_IDS);


x=ismember(USERREL.FROMUPMID,App_users_IDS.UPM_USER_ID);

y=ismember(USERREL.TOUPMID,App_users_IDS.UPM_USER_ID);

lambda=find(x.*y);

%use only the links where we have information for both ego and alter
USERREL_USEDFOR_SOCIAL_INFLUENCE=USERREL(lambda,:);

%use only relationships that establsed in the period of observation
USERREL_USEDFOR_SOCIAL_INFLUENCE=USERREL_USEDFOR_SOCIAL_INFLUENCE(find(USERREL_USEDFOR_SOCIAL_INFLUENCE.created_datenumber<=datenum(last_day_of_observation)));


%find the usernumber of the ego and alter. The usernumber refers to the
%position of the individual entry in the App_Users_in_Graph_demographics data.

%ego
User_num_in_USERREL_FROMUPMID=zeros(size(USERREL_USEDFOR_SOCIAL_INFLUENCE.FROMUPMID));
[C,ia,ib]=unique(USERREL_USEDFOR_SOCIAL_INFLUENCE.FROMUPMID);
[CC,iaa,ibb]=intersect(C,App_users_IDS.UPM_USER_ID);
z=ib(find(ismember(USERREL_USEDFOR_SOCIAL_INFLUENCE.FROMUPMID,CC)));
[L,k,j]=unique(C(z));
[LL,kk,jj]=intersect(L,CC);
ii=kk(j);
User_num_in_USERREL_FROMUPMID(find(ismember(USERREL_USEDFOR_SOCIAL_INFLUENCE.FROMUPMID,CC)))=ibb(ii);


%alter
User_num_in_USERREL_TOUPMID=zeros(size(USERREL_USEDFOR_SOCIAL_INFLUENCE.TOUPMID));
[C,ia,ib]=unique(USERREL_USEDFOR_SOCIAL_INFLUENCE.TOUPMID);
[CC,iaa,ibb]=intersect(C,App_users_IDS.UPM_USER_ID);
z=ib(find(ismember(USERREL_USEDFOR_SOCIAL_INFLUENCE.TOUPMID,CC)));
[L,k,j]=unique(C(z));
[LL,kk,jj]=intersect(L,CC);
ii=kk(j);
User_num_in_USERREL_TOUPMID(find(ismember(USERREL_USEDFOR_SOCIAL_INFLUENCE.TOUPMID,CC)))=ibb(ii);

USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM=User_num_in_USERREL_FROMUPMID;
USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM=User_num_in_USERREL_TOUPMID;

save USERREL_USEDFOR_SOCIAL_INFLUENCE USERREL_USEDFOR_SOCIAL_INFLUENCE




%calculate the correlations in weather between the dyads

load PRECIPITATION_mat 
load TMAX_mat 

%time lagged weather:
PRECIPITATION_mat_t_1=[NaN(1,length(PRECIPITATION_mat(1,:)));PRECIPITATION_mat(1:end-1,:)];
PRECIPITATION_mat_t_2=[NaN(2,length(PRECIPITATION_mat(1,:)));PRECIPITATION_mat(1:end-2,:)];
PRECIPITATION_mat_t_3=[NaN(3,length(PRECIPITATION_mat(1,:)));PRECIPITATION_mat(1:end-3,:)];

TMAX_mat_t_1=[NaN(1,length(PRECIPITATION_mat(1,:)));TMAX_mat(1:end-1,:)];
TMAX_mat_t_2=[NaN(2,length(PRECIPITATION_mat(1,:)));TMAX_mat(1:end-2,:)];
TMAX_mat_t_3=[NaN(3,length(PRECIPITATION_mat(1,:)));TMAX_mat(1:end-3,:)];

corr_same_day=zeros(length(USERREL_USEDFOR_SOCIAL_INFLUENCE(:,1)),1);
corr_one_day=zeros(length(USERREL_USEDFOR_SOCIAL_INFLUENCE(:,1)),1);
corr_two_day=zeros(length(USERREL_USEDFOR_SOCIAL_INFLUENCE(:,1)),1);

%same day correlation
for i=1:length(USERREL_USEDFOR_SOCIAL_INFLUENCE(:,1))
    a=[PRECIPITATION_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM(i));TMAX_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM(i))];
    b=[PRECIPITATION_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM(i));TMAX_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM(i))];
    k=find(isnan(a) | isnan(b));
    a(k)=[];b(k)=[];
    if length(a)>20
      corr_same_day(i)=(mean(a.*b)-mean(a)*mean(b))/sqrt(mean(a.^2)-mean(a)^2)/sqrt(mean(b.^2)-mean(b)^2); %correlation coefficient
    else
       corr_same_day(i)=NaN; 
    end
    if mod(i,1000)==0
    i
    end
   
end


%one day difference correlation
for i=1:length(USERREL_USEDFOR_SOCIAL_INFLUENCE(:,1))
    a=[PRECIPITATION_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM(i));TMAX_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM(i))];
    b=[PRECIPITATION_mat_1(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM(i));TMAX_mat_1(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM(i))];
    k=find(isnan(a) | isnan(b));
    a(k)=[];b(k)=[];
    if length(a)>20
      corr_one_day(i)=(mean(a.*b)-mean(a)*mean(b))/sqrt(mean(a.^2)-mean(a)^2)/sqrt(mean(b.^2)-mean(b)^2); %correlation coefficient
    else
       corr_one_day(i)=NaN; 
    end
    if mod(i,1000)==0
    i
    end
   
end


%two day difference correlation
for i=1:length(USERREL_USEDFOR_SOCIAL_INFLUENCE(:,1))
    a=[PRECIPITATION_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM(i));TMAX_mat(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.FROM_USER_NUM(i))];
    b=[PRECIPITATION_mat_2(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM(i));TMAX_mat_2(:,USERREL_USEDFOR_SOCIAL_INFLUENCE.TO_USER_NUM(i))];
    k=find(isnan(a) | isnan(b));
    a(k)=[];b(k)=[];
    if length(a)>20
      corr_two_day(i)=(mean(a.*b)-mean(a)*mean(b))/sqrt(mean(a.^2)-mean(a)^2)/sqrt(mean(b.^2)-mean(b)^2); %correlation coefficient
    else
       corr_two_day(i)=NaN; 
    end
    if mod(i,1000)==0
    i
    end
   
end



USERREL_USEDFOR_SOCIAL_INFLUENCE.weather_corr_same_day=corr_same_day;
USERREL_USEDFOR_SOCIAL_INFLUENCE.weather_corr_one_day=corr_one_day;
USERREL_USEDFOR_SOCIAL_INFLUENCE.weather_corr_two_day=corr_two_day;

USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations=USERREL_USEDFOR_SOCIAL_INFLUENCE;

save USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations USERREL_USEDFOR_SOCIAL_INFLUENCE_wth_correlations

